### CODE FOR FIGURE 3 AND TABLE A3 ###
library(tidyverse)
library(broom)
library(estimatr)
library(deming)
library(xtable)
library(rsample)
set.seed(78712)

# set wd
setwd("data/analysis/cleaned")

# function to extract ATEs
Get_ATE <- function(data, subsample = c(0,1)) {
      out <- tidy(lm_robust(formula(data$FORMULA[1]), data, 
                            subset = TNRFU %in% subsample))
      out %>% slice_tail() %>%
            mutate(StudyID = data$StudyId[1])
}

#### Estimate CATEs for each subgroup ####

##### WHOLE SAMPLE #####
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

all_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

all_se_int <- sd(all_se_ints)

dem_all <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)

##### GENDER #####
## Men ##
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

men_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

men_se_int <- sd(all_se_ints)

# Deming regression
dem_men <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)

## Women ##
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 2)
      
      if(nrow(d)==0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

women_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

women_se_int <- sd(all_se_ints)

# Deming regression
dem_women <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                    data = dem_dat, jackknife = T)

##### AGE (3 cat.) #####
## 18- to 39-year-olds ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "18-39")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age1_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(age1_results, subsample == "eager")
reluctants <- filter(age1_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age1_se_int <- sd(all_se_ints)

# Deming regression
dem_age1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)

## 40- to 59-year-olds ##
# set working directory

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "40-59")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age2_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(age2_results, subsample == "eager")
reluctants <- filter(age2_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age2_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age2_se_int <- sd(all_se_ints)

# Deming regression
dem_age2 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## 60+-year-olds ##
# set working directory

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "60+")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

age3_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(age3_results, subsample == "eager")
reluctants <- filter(age3_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age3_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age3_se_int <- sd(all_se_ints)

# Deming regression
dem_age3 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)

#### Party ID (3 cat.) ####
### Democrats ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Democrat")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

dem_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(dem_results, subsample == "eager")
reluctants <- filter(dem_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

dems_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

dems_se_int <- sd(all_se_ints)

# Deming regression
dem_dems <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)

### Independents ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Independent")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

ind_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(ind_results, subsample == "eager")
reluctants <- filter(ind_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

ind_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

ind_se_int <- sd(all_se_ints)

# Deming regression
dem_ind <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)

### Republicans ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Republican")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

rep_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(rep_results, subsample == "eager")
reluctants <- filter(rep_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

rep_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

rep_se_int <- sd(all_se_ints)

# Deming regression
dem_rep <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)


#### EDUCATION (3 cat.) ####

### HS Degree or less ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "1No HS degree" | EDUC4 == "2HS degree")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

nohs_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(nohs_results, subsample == "eager")
reluctants <- filter(nohs_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

nohs_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

nohs_se_int <- sd(all_se_ints)

# Deming regression
dem_nohs <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


### Some college ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "3Some college")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

somecoll_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

somecoll_se_int <- sd(all_se_ints)

# Deming regression
dem_somecoll <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


### BA or higher ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "4BA or higher")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

ba_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

ba_se_int <- sd(all_se_ints)

# Deming regression
dem_ba <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                 data = dem_dat, jackknife = T)

#### INCOME (3 cat.) #### 

## Lower Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Low Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc1_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(inc1_results, subsample == "eager")
reluctants <- filter(inc1_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc1_se_int <- sd(all_se_ints)

# Deming regression
dem_inc1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)

## Middle Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Middle Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc2_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(inc2_results, subsample == "eager")
reluctants <- filter(inc2_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc2_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc2_se_int <- sd(all_se_ints)

# Deming regression
dem_inc2 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## Upper Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "High Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

inc3_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(inc3_results, subsample == "eager")
reluctants <- filter(inc3_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc3_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc3_se_int <- sd(all_se_ints)

# Deming regression
dem_inc3 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


#### RACE ####
## White ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

white_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(white_results, subsample == "eager")
reluctants <- filter(white_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

white_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

white_se_int <- sd(all_se_ints)

# Deming regression
dem_white <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                    data = dem_dat, jackknife = T)


## Non-white ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY != 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

nonwhite_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(nonwhite_results, subsample == "eager")
reluctants <- filter(nonwhite_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

nonwhite_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

nonwhite_se_int <- sd(all_se_ints)

# Deming regression
dem_nonwhite <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


#### METRO AREA ####

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 1)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

metro1_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(metro1_results, subsample == "eager")
reluctants <- filter(metro1_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

metro1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

metro1_se_int <- sd(all_se_ints)

# Deming regression
dem_metro1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                     data = dem_dat, jackknife = T)


## Lives in a Non-metro Area ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 0)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

metro0_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(metro0_results, subsample == "eager")
reluctants <- filter(metro0_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
dem_dat <- filter(dem_dat, df.r > 10) # dropping cases problematically small N

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

metro0_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

metro0_se_int <- sd(all_se_ints)

# Deming regression
dem_metro0 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                     data = dem_dat, jackknife = T)


#### PHONE SERVICE ####

## Landline ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, PHONESERVICE == 1 | PHONESERVICE == 3)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

landline_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(landline_results, subsample == "eager")
reluctants <- filter(landline_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
dem_dat <- filter(dem_dat, df.r > 10) # dropping cases problematically small N

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

landline_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

landline_se_int <- sd(all_se_ints)

# Deming regression
dem_landline <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


## Cellphone ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, PHONESERVICE == 2 | PHONESERVICE == 4)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

cell_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(cell_results, subsample == "eager")
reluctants <- filter(cell_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

cell_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

cell_se_int <- sd(all_se_ints)

# Deming regression
dem_cell <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)



##### Constructing Dataset of Cofficients for FIGURE 3 #####
subgroup <- c("All", "Men", "Women", "Age: 18-39", "Age: 40-59", "Age: 60+",
              "Democrats", "Independents", "Republicans", "HS or less",
              "Some college", "College or more", "Low-Income", "Mid-Income",
              "High-Income", "Whites", "Non-Whites", "Metro", "Non-Metro", 
              "Landline", "Cellphone")

ints <- c(dem_all$coefficients[1],
          dem_men$coefficients[1],
          dem_women$coefficients[1],
          dem_age1$coefficients[1],
          dem_age2$coefficients[1],
          dem_age3$coefficients[1],
          dem_dems$coefficients[1],
          dem_ind$coefficients[1],
          dem_rep$coefficients[1],
          dem_nohs$coefficients[1],
          dem_somecoll$coefficients[1],
          dem_ba$coefficients[1],
          dem_inc1$coefficients[1],
          dem_inc2$coefficients[1],
          dem_inc3$coefficients[1],
          dem_white$coefficients[1],
          dem_nonwhite$coefficients[1],
          dem_metro1$coefficients[1],
          dem_metro0$coefficients[1],
          dem_landline$coefficients[1],
          dem_cell$coefficients[1]
)

se.ints <- c(all_se_int,
             men_se_int,
             women_se_int,
             age1_se_int,
             age2_se_int,
             age3_se_int,
             dems_se_int,
             ind_se_int,
             rep_se_int,
             nohs_se_int,
             somecoll_se_int,
             ba_se_int,
             inc1_se_int,
             inc2_se_int,
             inc3_se_int,
             white_se_int,
             nonwhite_se_int,
             metro1_se_int,
             metro0_se_int,
             landline_se_int,
             cell_se_int
)

ests <- c(dem_all$coefficients[2],
          dem_men$coefficients[2],
          dem_women$coefficients[2],
          dem_age1$coefficients[2],
          dem_age2$coefficients[2],
          dem_age3$coefficients[2],
          dem_dems$coefficients[2],
          dem_ind$coefficients[2],
          dem_rep$coefficients[2],
          dem_nohs$coefficients[2],
          dem_somecoll$coefficients[2],
          dem_ba$coefficients[2],
          dem_inc1$coefficients[2],
          dem_inc2$coefficients[2],
          dem_inc3$coefficients[2],
          dem_white$coefficients[2],
          dem_nonwhite$coefficients[2],
          dem_metro1$coefficients[2],
          dem_metro0$coefficients[2],
          dem_landline$coefficients[2],
          dem_cell$coefficients[2]
)

se.ests <- c(all_se_slope,
             men_se_slope,
             women_se_slope,
             age1_se_slope,
             age2_se_slope,
             age3_se_slope,
             dems_se_slope,
             ind_se_slope,
             rep_se_slope,
             nohs_se_slope,
             somecoll_se_slope,
             ba_se_slope,
             inc1_se_slope,
             inc2_se_slope,
             inc3_se_slope,
             white_se_slope,
             nonwhite_se_slope,
             metro1_se_slope,
             metro0_se_slope,
             landline_se_slope,
             cell_se_slope
)

ci.l <- c(dem_all$ci[2,1],
          dem_men$ci[2,1],
          dem_women$ci[2,1],
          dem_age1$ci[2,1],
          dem_age2$ci[2,1],
          dem_age3$ci[2,1],
          dem_dems$ci[2,1],
          dem_ind$ci[2,1],
          dem_rep$ci[2,1],
          dem_nohs$ci[2,1],
          dem_somecoll$ci[2,1],
          dem_ba$ci[2,1],
          dem_inc1$ci[2,1],
          dem_inc2$ci[2,1],
          dem_inc3$ci[2,1],
          dem_white$ci[2,1],
          dem_nonwhite$ci[2,1],
          dem_metro1$ci[2,1],
          dem_metro0$ci[2,1],
          dem_landline$ci[2,1],
          dem_cell$ci[2,1]
)

ci.h <- c(dem_all$ci[2,2],
          dem_men$ci[2,2],
          dem_women$ci[2,2],
          dem_age1$ci[2,2],
          dem_age2$ci[2,2],
          dem_age3$ci[2,2],
          dem_dems$ci[2,2],
          dem_ind$ci[2,2],
          dem_rep$ci[2,2],
          dem_nohs$ci[2,2],
          dem_somecoll$ci[2,2],
          dem_ba$ci[2,2],
          dem_inc1$ci[2,2],
          dem_inc2$ci[2,2],
          dem_inc3$ci[2,2],
          dem_white$ci[2,2],
          dem_nonwhite$ci[2,2],
          dem_metro1$ci[2,2],
          dem_metro0$ci[2,2],
          dem_landline$ci[2,2],
          dem_cell$ci[2,2]
)

n <- c(dem_all$n,
       dem_men$n,
       dem_women$n,
       dem_age1$n,
       dem_age2$n,
       dem_age3$n,
       dem_dems$n,
       dem_ind$n,
       dem_rep$n,
       dem_nohs$n,
       dem_somecoll$n,
       dem_ba$n,
       dem_inc1$n,
       dem_inc2$n,
       dem_inc3$n,
       dem_white$n,
       dem_nonwhite$n,
       dem_metro1$n,
       dem_metro0$n,
       dem_landline$n,
       dem_cell$n
)

tab <- tibble(subgroup = subgroup, ints = ints, se.ints = se.ints,
              ests = ests, se.ests = se.ests, n = n)

# Calculating Benjamini-Hochman CIs
# for slopes
slope_dat <- tab %>% 
      mutate(effect.size = abs( (ests-1)/se.ests ),
             rank = rank(-effect.size),
             alpha.hat = rank * .05/21,
             t.critical.fdr = qt(1 - (alpha.hat/2), 21),
             lower.fdr = ests - t.critical.fdr*se.ests,
             upper.fdr = ests + t.critical.fdr*se.ests) 


# for intercepts
intercept_dat <- tab %>%
      mutate(effect.size = abs( (ints)/se.ints ),
             rank = rank(-effect.size),
             alpha.hat = rank * .05/21,
             t.critical.fdr = qt(1 - (alpha.hat/2), 21),
             lower.fdr = ints - t.critical.fdr*se.ints,
             upper.fdr = ints + t.critical.fdr*se.ints) 



### Plotting slope coefficients
# for ordering points
sg.ord <- 21:1

### Making slope subfigure
sl <- slope_dat %>% 
      ggplot(aes(reorder(subgroup, sg.ord), ests)) + 
      geom_pointrange(aes(ymin = lower.fdr, ymax = upper.fdr)) + 
      coord_flip() + 
      geom_hline(yintercept = 1, linetype = 2) + 
      labs(y = "Slope Estimates & 95% CIs", x = "Subgroup") +
      theme_bw() + 
      theme(text=element_text(size=16))


# Making intercept subfigure
int <- intercept_dat %>% 
      ggplot(aes(reorder(subgroup, sg.ord), ints)) + 
      geom_pointrange(aes(ymin = lower.fdr, ymax = upper.fdr)) + 
      scale_y_continuous(breaks = c(-.1, -.05, 0, .05, .1)) +
      coord_flip() + 
      geom_hline(yintercept = 0, linetype = 2) + 
      labs(y = "Intercept Estimates & 95% CIs", x = "") +
      theme_bw() + 
      theme(text=element_text(size=16),
            axis.text.y = element_blank())

# arranging the two together
ggpubr::ggarrange(sl, int, widths = c(1.25,1))

# save
ggsave(file = "../../../results/Figure_3.pdf")


#### APPENDIX TABLE F3 ####

xtable(tab, "Coefficient Estimates and Bootstrapped Standard Errors 
       from Deming Regressions of Eager ATE on Reluctant ATE",
       digits = 3) |> 
      print(include.rownames = F) |>
      write_file(file = "../../../results/Table_F3.tex")



